/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $
 * dtlb_base.S:	Front end to DTLB miss replacement strategy.
 *              This is included directly into the trap table.
 *
 * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
 * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz)
 */

#include <asm/pgtable.h>
#include <asm/mmu_context.h>

/* %g1	TLB_SFSR	(%g1 + %g1 == TLB_TAG_ACCESS)
 * %g2	(KERN_HIGHBITS | KERN_LOWBITS)
 * %g3  VPTE base	(0xfffffffe00000000)	Spitfire/Blackbird (44-bit VA space)
 *			(0xffe0000000000000)	Cheetah		   (64-bit VA space)
 * %g7	__pa(current->mm->pgd)
 *
 * The VPTE base value is completely magic, but note that
 * few places in the kernel other than these TLB miss
 * handlers know anything about the VPTE mechanism or
 * how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD).
 * Consider the 44-bit VADDR Ultra-I/II case as an example:
 *
 * VA[0 :  (1<<43)] produce VPTE index [%g3                        :   0]
 * VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3]
 *
 * For Cheetah's 64-bit VADDR space this is:
 *
 * VA[0 :  (1<<63)] produce VPTE index [%g3                        :   0]
 * VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3]
 *
 * If you're paying attention you'll notice that this means half of
 * the VPTE table is above %g3 and half is below, low VA addresses
 * map progressively upwards from %g3, and high VA addresses map
 * progressively upwards towards %g3.  This trick was needed to make
 * the same 8 instruction handler work both for Spitfire/Blackbird's
 * peculiar VA space hole configuration and the full 64-bit VA space
 * one of Cheetah at the same time.
 */

/* Ways we can get here:
 *
 * 1) Nucleus loads and stores to/from PA-->VA direct mappings.
 * 2) Nucleus loads and stores to/from vmalloc() areas.
 * 3) User loads and stores.
 * 4) User space accesses by nucleus at tl0
 */

#if PAGE_SHIFT == 13
/*
 * To compute vpte offset, we need to do ((addr >> 13) << 3),
 * which can be optimized to (addr >> 10) if bits 10/11/12 can
 * be guaranteed to be 0 ... mmu_context.h does guarantee this
 * by only using 10 bits in the hwcontext value.
 */
#define CREATE_VPTE_OFFSET1(r1, r2)
#define CREATE_VPTE_OFFSET2(r1, r2) \
				srax	r1, 10, r2
#define CREATE_VPTE_NOP		nop
#else
#define CREATE_VPTE_OFFSET1(r1, r2) \
				srax	r1, PAGE_SHIFT, r2
#define CREATE_VPTE_OFFSET2(r1, r2) \
				sllx	r2, 3, r2
#define CREATE_VPTE_NOP
#endif

/* DTLB ** ICACHE line 1: Quick user TLB misses		*/
	ldxa		[%g1 + %g1] ASI_DMMU, %g4	! Get TAG_ACCESS
	andcc		%g4, TAG_CONTEXT_BITS, %g0	! From Nucleus?
	mov		1, %g5				! For TL==3 test
from_tl1_trap:
	CREATE_VPTE_OFFSET1(%g4, %g6)			! Create VPTE offset
	be,pn		%xcc, 3f			! Yep, special processing
	 CREATE_VPTE_OFFSET2(%g4, %g6)			! Create VPTE offset
	cmp		%g5, 4				! Last trap level?
	be,pn		%xcc, longpath			! Yep, cannot risk VPTE miss
	 nop						! delay slot

/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses	*/
	ldxa		[%g3 + %g6] ASI_S, %g5		! Load VPTE
1:	brgez,pn	%g5, longpath			! Invalid, branch out
	 nop						! Delay-slot
9:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
	retry						! Trap return
3:	brlz,pt		%g4, 9b				! Kernel virtual map?
	 xor		%g2, %g4, %g5			! Finish bit twiddles
	ba,a,pt		%xcc, kvmap			! Yep, go check for obp/vmalloc

/* DTLB ** ICACHE line 3: winfixups+real_faults		*/
longpath:
	rdpr		%pstate, %g5			! Move into alternate globals
	wrpr		%g5, PSTATE_AG|PSTATE_MG, %pstate
	rdpr		%tl, %g4			! See where we came from.
	cmp		%g4, 1				! Is etrap/rtrap window fault?
	mov		TLB_TAG_ACCESS, %g4		! Prepare for fault processing
	ldxa		[%g4] ASI_DMMU, %g5		! Load faulting VA page
	be,pt		%xcc, sparc64_realfault_common	! Jump to normal fault handling
	 mov		FAULT_CODE_DTLB, %g4		! It was read from DTLB

/* DTLB ** ICACHE line 4: Unused...	*/
	ba,a,pt		%xcc, winfix_trampoline		! Call window fixup code
	nop
	nop
	nop
	nop
	nop
	nop
	CREATE_VPTE_NOP

#undef CREATE_VPTE_OFFSET1
#undef CREATE_VPTE_OFFSET2
#undef CREATE_VPTE_NOP
